/*
This do-file merges the country allocation database and the funds flows database. 
Then it transforms into panel format.
*/

***************************************************************************
* import weekly flows data
use "$datapath/__EPFRdata_wflows_p.dta", clear
* check reported assets under management corresponds to what it is supposed to be
gen assetend_check = l.assetsend*(1+portfoliochangepct/100) + forexchange + flow
order assetend_check flow assetsend datew forexchange portfoliochangepct 
***************************************************************************

***************************************************************************
* keep US euity funds using US dollar as currency
keep if funddomicile == "USA"
keep if fundcurrency == "USD"
***************************************************************************

***************************************************************************
* weekly returns to monthly returns
gen distance = datem - 489

foreach v of varlist portfoliochangepct {
	bys distance  fundid (datew): gen monthly`v' = (1+`v'[1]/100)
	bys distance  fundid (datew): replace monthly`v' = monthly`v'[_n-1]*(1+`v'[_n]/100) if _n > 1 
}

foreach v of varlist monthly* {
	qui replace `v' = `v' - 1
}
***************************************************************************

***************************************************************************
* dimensions: months and funds from dimensions weeks and funds
bys fundid (datem) : keep if datem != datem[_n+1]
format datem %tm
***************************************************************************

***************************************************************************
* Merge with monthly country allocations
merge 1:1 fundid datem using "$datapath/__EPFRdata_mallocations_p.dta"
keep if _merge == 3
xtset fundid datem
keep if assetclass2 == "Equity"
***************************************************************************

***************************************************************************
* time sample
keep if datem <= ym(2016,7)
keep if datem > ym(2001,12)
***************************************************************************

***************************************************************************
* drop funds reporting less than 12 months and small funds
gen aum = assetsend

bys fundid  (datem) : gen obs1 = _n
bys fundid  (datem) : gen N = _N
order N

gen lastaum = aum if obs1 == N
forvalues j = 1/190 {
	bys fundid  (datem) : replace lastaum = lastaum[_n+1] if !missing(lastaum[_n+1]) 
}

drop if lastaum < 5 
drop if N < 12
***************************************************************************

***************************************************************************
*** some funds do not report continuously. We drop subperiod allocations in which there are less than 12 consecutive months. 

* track entries and exits
tsset fundid datem
tsfill, full

// entering funds
bys fundid (datem) : gen status = "there" if fundname[_n] != "" // firm already reports
order datem fundid fundname status
/* first observation before status = 1 is entry */
bys fundid (datem) : replace status = "Entry" if status[_n-1] == "" & status[_n+1] == "there" & status[_n] == "there"
/* last observation of status  is exit */
bys fundid (datem) : replace status = "Exit" if status[_n-1] == "there" & status[_n+1] == "" & status[_n] == "there"

* blank between exit and entry is there after interpolation
bys fundid (datem) : replace status = "there" if status[_n+1] == "Entry" & status[_n-1] == "Exit"

* blank between there and entry is there after interpolation
bys fundid (datem) : replace status = "there" if status[_n+1] == "Entry" & status[_n-1] == "there"

* blank between there and there is there after interpolation
bys fundid (datem) : replace status = "there" if status[_n+1] == "there" & status[_n-1] == "there"

/* last observation of status  is exit */
bys fundid (datem) : replace status = "Exit" if status[_n-1] == "there" & status[_n+1] == "" & status[_n] == "there"

* fund 1719
replace status = "there" if fundid == 1719 & datem == ym(2003,9)
replace status = "there" if fundid == 1719 & datem == ym(2003,10)
replace status = "there" if fundid == 1719 & datem == ym(2003,11)
replace status = "there" if fundid == 1719 & datem == ym(2003,12)

* blank between Exit and there is there after interpolation
bys fundid (datem) : replace status = "there" if status[_n+1] == "there" & status[_n-1] == "Exit"
bys fundid (datem) : replace status = "there" if status[_n+1] == "there" & status[_n-1] == "there"
bys fundid (datem) : replace status = "Exit" if status[_n-1] == "there" & status[_n+1] == "" & status[_n] == "there"

* count number of entries / exits
gen entry = 1 if status == "Entry"
gen exit = 1 if status == "Exit"  
replace exit = . if status == "Exit" & datem == ym(2016,7)
bys fundid : egen fentrycount = count(entry)
bys fundid : egen fexitcount = count(exit)

ta fundid if fentrycount > 1

drop if fundid == 3914 & datem < ym(2006,5)
drop if fundid == 13521 & datem > ym(2015,5)
drop if fundid == 21539 & datem < ym(2011,12)

* recount number of entries and exits
drop f*count entry exit
gen entry = 1 if status == "Entry"
gen exit = 1 if status == "Exit"  
replace exit = . if status == "Exit" & datem == ym(2016,7)
bys fundid : egen fentrycount = count(entry)
bys fundid : egen fexitcount = count(exit)
/*
ALL good for entries
*/

ta fundid if fexitcount > 1 
/*
ALL good for exits > 1
*/

* verify N 
drop if status == ""
bys fundid (datem): gen obs2 = _N 
***************************************************************************

***************************************************************************
* Interpolation: some funds de not report for a month or two. We interpolate the values by averaging the past and next periods.

* interpolation is average betwenn ca + 1 and ca - 1 for numeric variable
foreach v of varlist assetsend fundsize monthlyportfoliochangepct cash albania-zimbabwe {
	bys fundid (datem) : replace `v' = (`v'[_n+1] + `v'[_n-1])/2 if status == "there" & missing(`v')
}

* fund 1719
foreach v of varlist assetsend fundsize monthlyportfoliochangepct cash albania-zimbabwe {
	replace `v' = (`v'[_n+2] + `v'[_n-1])/2 if status == "there" & missing(`v') & fundid == 1719 & datem == ym(2003,10)
}
foreach v of varlist assetsend fundsize monthlyportfoliochangepct cash albania-zimbabwe {
	replace `v' = (`v'[_n+1] + `v'[_n-1])/2 if status == "there" & missing(`v') & fundid == 1719 & datem == ym(2003,11)
}
***************************************************************************

***************************************************************************
*** Tranform into panel
* rename countries with prefix
foreach v of varlist albania algeria angola argentina australia austria azerbaijan bahrain balticrepublics bangladesh belarus belgium bolivia bosniaherzegovina botswana brazil bulgaria cambodia canada chile china colombia congokinshasa costarica croatia cuba cyprus czechrepublic denmark dominicanrepublic ecuador egypt elsalvador estonia ethiopia finland france gabon georgia germany ghana greece guatemala honduras hongkong hungary iceland india indonesia iran iraq ireland israel italy ivorycoast jamaica japan jordan kazakhstan kenya koreanorth koreasouth kuwait latvia lebanon liberia libya lithuania macedonia madagascar malawi malaysia mauritius mexico moldova mongolia morocco mozambique myanmar namibia netherlands newzealand nicaragua nigeria norway oman otherasia otherbonds otherequity othereurope otherlatinamerica othermiddleeastandafrica pakistan panama papuanewguinea paraguay peru philippines poland portugal qatar romania russianfederation rwanda saudiarabia serbia sierraleone singapore slovakia slovenia southafrica spain srilanka swaziland sweden switzerland taiwan tajikistan tanzania thailand trinidadandtobago tunisia turkey turkmenistan uganda ukraine unitedarabemirates unitedkingdom uruguay unitedstates venezuela vietnam yemen zambia zimbabwe {
	qui rename `v' country_`v'
}

* reshape data
reshape long country_ , i(datem fundid) j(cr) string

* rename variables
rename cr country
sencode country, gen(id)
drop _merge
rename country_ ca

***************************************************************************
order datem fundid country id
sort country fundid datem

save "$datapath/__EPFRdata__4__Main__Results.dta", replace
* end
***************************************************************************